/* Clear workspace */
clear 

************************************************************************************
* DESCRIPTION: This script processes CSV data into Stata format, 
* prepares variables, and aggregates information at the firm level.
************************************************************************************

*************************************************************
* Load and clean data
*************************************************************

/* Start logging */
capture log close
log using "$logs\csv_to_date_rejected.log", replace

/* Import dataset from Python output */
import delimited "$input\reject_scr_fiben_orbis.csv", clear

/* Convert date variable to Stata format */
gen time = date(date, "YMD")
format time %td
gen mdate = mofd(time)  // Convert to monthly format

/* Rename loan-related variables for consistency */
rename loanst stloan
rename loanlt ltloan
rename eventdate outsideloandate

/*************************************************************
* Standardizing Rating Categories
*************************************************************/

/* Extract rating category by removing the first letter */
gen rating10 = substr(rating, 2, 4)
tab rating10 accepted

/* Standardize rating categories */
gen rating10str = rating10
replace rating10str = "3A" if rating10str == "3++"
replace rating10str = "3B" if rating10str == "3+"
replace rating10str = "3C" if rating10str == "3"
replace rating10str = "4A" if rating10str == "4+"
replace rating10str = "4B" if rating10str == "4"
replace rating10str = "5A" if rating10str == "5+"
replace rating10str = "5B" if rating10str == "5"

drop rating
encode rating10str, gen(rating)

/* Save cleaned dataset */
save "$output\scr_fiben_reject.dta", replace

/*************************************************************
* Aggregating financial data
*************************************************************/

/* Collapse dataset to sum financial values by firm and loan characteristics */
collapse (sum) drawn available stloan ltloan co bm bi oc, ///
    by(date siren loantype rate maturity outsideloan accepted year rating ///
    industry activity region size outsideloandate post deltamonths r1 r1filled ///
    r2 r3 r3filled r4 r5 r5filled r6 r7 r7filled r8 r8filled r9 r9filled r10 r11 ///
    r12 r12filled r12bis r13 r13filled r14 r15 r16 r17 r_collateral ///
    r_collateral_filled r_collateral2 r_inno1 r_inno2 r_inno3 rdcostsfilled ///
    employees assets assets_filled age investment salaires va sales fiben time ///
    mdate pclopen pclliquidate ageenmois agefilled account_receivables_filled ///
    candidates ageatevent dettefiscta dettefouta inventory cash creclie dettefou ///
    dettefisc otherwc outlyingplatform)

/*************************************************************
* Logarithmic Transformations
*************************************************************/

/* Compute logarithmic transformations of financial variables */
gen logassets = log(assets)
gen logassets_filled = log(assets_filled)

/*************************************************************
* Compute Loan Timing Variables
*************************************************************/

/* Convert loan event date */
gen outloandate = date(outsideloandate, "YMD")
gen outloanmonth = month(outloandate)
gen outloanquarter = quarter(outloandate)
gen outloanyear = year(outloandate)

gen datestata = date(date, "YMD")
gen quarterdat = quarter(datestata)
gen datemonth = month(datestata)

/* Create loan timing variables */
gen outloantime = outloanyear * 100 + outloanmonth   // Used for matching
gen outloanqtime = outloanyear * 10 + outloanquarter

/* Compute the difference in months between loan date and event date */
gen delta = (year - outloanyear) * 12 + (datemonth - outloanmonth)

/* Sort and organize dataset */
sort siren year quarterdat delta date 
order siren year delta date accepted industry region size outsideloan outsideloandate

/* Save aggregated dataset */
save "$output\rejected_agg.dta", replace

*************************************************************
* Industry Classification Adjustments
*************************************************************

/* Merge industries into broader categories */
replace industry = "public" if inlist(industry, "P", "Q", "R", "S")  // Public sector
replace industry = "DE" if inlist(industry, "D", "E")  // Merge D & E
replace industry = "FL" if inlist(industry, "F", "L")  // Merge F & L

/* Exclude certain industries */
drop if industry == "A"   // Agriculture
drop if industry == "K"   // Finance and insurance
drop if industry == "O"   // Public administration
drop if industry == "B"   // Industry B
drop if industry == "DE"  // Not enough firms in DE category
drop if industry == "0000Z"  // Industry with little information

/* Save modified dataset */
save "$output\rejected_agg_industries_modified.dta", replace

/* Close log file */
log close
